################################################################################
### Replication Code: Data Cleaning 2016
################################################################################
#
# Paper: Rationalizing Protest Participation  
#
# Authors: Tim Baule, Jonathan Bothner, Maximilian Kähny
#
# Software: R version 4.4.0 using Windows
#
################################################################################


# Libraries
library(haven)   # read_dta / write_dta
library(dplyr)   # mutate / left_join / relocate

## working directory 
rm(list = ls())
setwd("C:/Users/timba/OneDrive - Universität Bayreuth (1)/Uni/Research General/Protest Network Project/empirics/do files and skripts/final")

## 1. Load data ---------------------------------------------------------------
anes16 <- read_dta("anes_timeseries_2016.dta")

## 2. Rename ------------------------------------------------------------------
anes16 <- anes16 %>% rename(state_id = V161010d)

## 3. Merge m:1 on state_id; keep only longitude/latitude ---------------------
state_coords <- read_dta("state_coordinates.dta") %>%
  select(state_id, longitude, latitude)

anes16 <- anes16 %>% left_join(state_coords, by = "state_id")

anes16 <- anes16 %>% select(-any_of("_merge"))

## temp1/temp2 zero-padding for state_id -----------------------------------
anes16 <- anes16 %>%
  mutate(
    temp1 = "",
    temp1 = if_else(state_id < 10, "0", temp1),
    temp2 = as.character(state_id)
  ) %>%
  select(-state_id) %>%
  mutate(state_id = paste0(temp1, temp2)) %>%
  select(-temp1, -temp2)


## Protest participation ---------------------------------------------------
anes16 <- anes16 %>%
  mutate(
    protest = 0,
    protest = if_else(V162018a == 1, 1, protest),
    protest = if_else(V162018a %in% c(-9, -7, -6), NA_real_, protest)
  ) %>%
  filter(!is.na(protest))   # drop if missing(protest)

## Network variables -------------------------------------------------------
anes16 <- anes16 %>%
  mutate(
    # education
    educ = V161270,
    educ = if_else(educ == 90, 9, educ),
    educ = if_else(educ %in% c(95, -9), NA_real_, educ),
    
    # age
    age = V161267,
    age = if_else(age < 0, NA_real_, age),
    
    # left/right
    leftright = V162289,
    leftright = if_else(leftright < 0, NA_real_, leftright),
    leftright2 = abs(5 - leftright),
    leftright2 = if_else(leftright < 0, NA_real_, leftright2),
    
    
    # religion (as defined by two questions)
    reli = 0,
    reli = if_else(V161241 < 0, NA_real_, reli),
    reli = if_else(V161241 == 1 & V161242 == 1, 1, reli),
    reli = if_else(V161241 == 1 & V161242 == 2, 2, reli),
    reli = if_else(V161241 == 1 & V161242 == 3, 3, reli),
    
    # trust
    trust = 0,
    trust = if_else(V161219 == 4, 1, trust),
    trust = if_else(V161219 == 3, 2, trust),
    trust = if_else(V161219 == 2, 3, trust),
    trust = if_else(V161219 == 1, 4, trust),
    trust = if_else(V161219 < 0, NA_real_, trust)
  )

## Individual variables ----------------------------------------------------
anes16 <- anes16 %>%
  mutate(
    # ethnicity
    black = 0,
    black = if_else(V161310x == 2, 1, black),
    black = if_else(V161310x == -2, NA_real_, black),
    
    # income
    inc = V161361x,
    inc = if_else(inc %in% c(-9, -5), NA_real_, inc),
    
    # sex/gender
    male = 0,
    male = if_else(V161342 == 1, 1, male),
    male = if_else(V161342 == -9, NA_real_, male),
    
    # health condition
    health = V161115,
    health = if_else(health == -9, NA_real_, health),
    
    # married or domestic partnership
    married = 0,
    married = if_else(V161268 %in% c(1, 2) | V161269 == 1, 1, married),
    married = if_else(V161268 == -9 | V161269 == -9, NA_real_, married),
    
    # children (rename then recode)
    children = V161324,
    children = if_else(children == -9, NA_real_, children),
    
    
    # volunteer work
    volunteer = 0,
    volunteer = if_else(V162197 == 1, 1, volunteer),
    volunteer = if_else((V162197 == -7 | V162197 == -6), NA_real_, volunteer),
    
    
    # community service
    community_service = 0,
    community_service = if_else(V162196 == 1, 1, community_service),
    community_service = if_else((V162196 == -8 | V162196 == -9), NA_real_, community_service),
    
    
    # attend church
    attend_church = 0,
    attend_church = if_else(V161244 == 1, 1, attend_church),
    attend_church = if_else((V161244 == -8 | V161244 == -9), NA_real_, attend_church),
    
    
  )


# FE for census region
anes16 <- anes16 %>% 
  mutate(census_region = case_when(
    # Northeast
    state_id %in% c(23,33,50,25,44,9,36,42,34)    ~ "Northeast",
    # Midwest
    state_id %in% c(39,18,17,26,55,27,19,29,38,46,31,20) ~ "Midwest",
    # South
    state_id %in% c(10,24,11,51,54,37,45,13,12,21,47,1,28,5,22,40,48) ~ "South",
    # West (everything else)
    TRUE ~ "West"
  )) %>%
  # make it an explicit factor so you control the baseline
  mutate(census_region = factor(census_region,levels = c("Northeast","Midwest","South","West")))

# 2) turn into three dummies (use West as the omitted category)
anes16 <- anes16 %>% 
  mutate(
    dummy_NE     = as.integer(census_region=="Northeast"),
    dummy_MW     = as.integer(census_region=="Midwest"),
    dummy_South  = as.integer(census_region=="South")
  )


# create variable lists
state_id_var <- "state_id"
all_vars <- c("protest", "leftright", "leftright2", "reli", "trust","age", "educ", "inc", "longitude", "latitude", "black","male", "health", "married", "children", "volunteer", "community_service", "attend_church",  state_id_var)

# drop nas
cc <- complete.cases(anes16[, all_vars])
anes_cc <- anes16[cc, ]


anes_cc <- anes_cc %>%
  mutate(
    educ = as.numeric(educ),
    inc  = as.numeric(inc),
    reli = as.numeric(reli)
  )

## Save --------------------------------------------------------------------
write_dta(anes_cc, "anes_timeseries_2016_clean.dta")


